from bs4 import BeautifulSoup
import requests
import time
import re


def get_all_url():
    # next_page = get_url('https://www.bancaiwang.cn/buy/')
    next_page = get_url('https://www.bancaiwang.cn/buy/index-htm-page-8.html')
    i = 0
    while next_page:
        next_page = get_url(next_page)
        time.sleep(1)
        print(next_page)
        i += 1
        if i % 10 == 0:
            time.sleep(100)


def get_url(url):
    ret = requests.get(url).content.decode('gbk','ignore')
    soup = BeautifulSoup(ret)
    print(soup)
    exit()
    info = soup.find_all('span', class_='fcolor_0 sp1 pu_co')
    next = soup.find_all('a',text=re.compile(r"下一页"))
    next_page = next[0]['href']
    print(next_page)
    with open('bancaiwang.txt','a') as w:
        for item in info:
            alink = item.find_all('a')
            # print(alink[0]['href'])
            w.write(alink[0]['href'])
            w.write("\n")
        if next_page:
            return 'https://www.bancaiwang.cn' + next_page
        return None

def get_detail():
    with open('aicaigou.txt','r') as r:
        i = 0
        with open('aicaigou_detail.txt','a') as w:
            url = r.readline()
            while url:
                ret = requests.get(url.strip()).content.decode('utf-8', 'ignore')
                soup = BeautifulSoup(ret)
                title_info = soup.find_all('h1', id='title')
                title = title_info[0].get_text()
                content_info = soup.find_all('div', id='content')
                content = content_info[0].get_text()
                res = title.strip() + '&&&' + content.strip()
                print(url)
                print(res)
                w.write(res)
                w.write("\n")
                url = r.readline()
                i += 1
                time.sleep(1)



if __name__ == '__main__':
    # get_url('https://s.1688.com/selloffer/offer_search.htm?keywords=%BD%C5%B6%D5&n=y&netType=1%2C11%2C16')
    # get_all_url()
    get_detail()